#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@Time    :   2021/05/19 14:13:28
@Author  :   Leo Wood 
@Contact :   leowood@foxmail.com
'''


import pandas as pd
import random
random.seed(7)


if __name__ == '__main__':


    class_dict = {'OBJECTIVE':0, 'METHODS':1, 'RESULTS':2, 'CONCLUSIONS':3,'BACKGROUND':4}

    df = pd.read_csv('result_move.csv')
    print(df)

    ids = list(set(df['id'].to_list()))
    test_ids = random.sample(ids,500)
    train_ids = [i for i in ids if i not in test_ids]

    test_df = df[df['id'].isin(test_ids)]
    train_df = df[-df['id'].isin(test_ids)]

    train_df['move'] = train_df['move'].map(class_dict)
    train_df[['move','text']].to_csv('train.tsv',sep='\t',index=False,header=False)

    test_df['move'] = test_df['move'].map(class_dict)
    test_df[['move','text']].to_csv('test.tsv',sep='\t',index=False,header=False)

    test_df[['move','text']].to_csv('dev.tsv',sep='\t',index=False,header=False)




